package com.trytech.mongoocrawler.server.xml;

import com.trytech.mongoocrawler.server.CrawlerConfig;
import com.trytech.mongoocrawler.server.common.db.CrawlerDataSource;
import com.trytech.mongoocrawler.server.common.exception.DataSourceInitException;
import com.trytech.mongoocrawler.server.exception.ConfigParseException;
import com.trytech.mongoocrawler.server.parser.HtmlParser;
import com.trytech.mongoocrawler.server.pipeline.AbstractPipeline;
import com.trytech.mongoocrawler.server.pipeline.PipelineProxy;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.xml.sax.SAXException;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
 * Xml配置对象构造器
 * 使用dom4j来解析xml
 */
@Slf4j
public class XmlDocumentBuilder {
    //xml解析器
    private SAXReader saxParser;

    public static XmlDocumentBuilder newInstance(){
        XmlDocumentBuilder builder = new XmlDocumentBuilder();
        return builder;
    }

    public XmlConfigBean parse(String path) throws SAXException, IOException, DocumentException, DataSourceInitException {
        try {
            if(saxParser == null) {
                saxParser = new SAXReader();
            }
            Document doc = saxParser.read(new File(path));
            Element root = doc.getRootElement();

            XmlConfigBean xmlConfigBean = new XmlConfigBean();
            //获取运行模式
            Element crawlerModeNode = root.element("mode");
            if (crawlerModeNode == null) {
                throw new ConfigParseException("没有配置mode节点");
            }
            ModeConfigBean modeConfigBean = new ModeConfigBean();
            int mode = CrawlerConfig.CrawlerMode.LOCAL_MODE.getValue();
            try {
                mode = Integer.parseInt(crawlerModeNode.attributeValue("value"));
            } catch (NumberFormatException e) {
                log.warn("爬虫的运行模式mode配置错误，请检查配置，默认使用单机运行模式");
            }
            modeConfigBean.setCrawlerMode(CrawlerConfig.CrawlerMode.paserMode(mode));

            //获取爬虫服务器接口
            Element serverElement = crawlerModeNode.element("server");
            if (serverElement != null) {
                int serverPort = Integer.parseInt(serverElement.attributeValue("port"));
                modeConfigBean.setServerPort(serverPort);
            }
            xmlConfigBean.setModeConfigBean(modeConfigBean);

            //数据库连接池
            Element databaseCollectionElement = root.element("datasources");
            List<Element> datasourceElements = databaseCollectionElement.elements("datasource");
            for(Element datasourceElement : datasourceElements) {

                    String datasourceName = datasourceElement.attributeValue("name");
                    String datasourceClsName = datasourceElement.attributeValue("class");
                    CrawlerDataSource crawlerDataSource = (CrawlerDataSource) Class.forName(datasourceClsName).newInstance();
                    crawlerDataSource.setName(datasourceName);
                    crawlerDataSource.setCls(Class.forName(datasourceClsName));

                    List<Element> propertiesElements = datasourceElement.elements("property");
                    Map<String, Object> databaseProperties = new HashMap<String, Object>();
                    for (Element element : propertiesElements) {
                        databaseProperties.put(element.attribute("name").getValue(), element.attribute("value").getValue());
                    }
                    crawlerDataSource.setPropertiesMap(databaseProperties);
                    crawlerDataSource.init();
                    xmlConfigBean.setDataSource(datasourceName, crawlerDataSource);

            }

            //解析cache
            Element cacheElement = root.element("cache");
            if (cacheElement != null) {
                String type = cacheElement.attributeValue("type");
                CacheXmlConfigBean cacheXmlConfigBean = CacheXmlConfigBean.getCacheBean(CacheXmlConfigBean.CacheType.getCacheType(type));

                //获取ip和port
                Element ipElement = cacheElement.element("ip");
                String ip = ipElement.getTextTrim();
                Element portElement = cacheElement.element("port");
                if (StringUtils.isNotEmpty(portElement.getTextTrim())) {
                    int port = Integer.parseInt(portElement.getTextTrim());
                    cacheXmlConfigBean.setIp(ip);
                    cacheXmlConfigBean.setPort(port);

                    xmlConfigBean.registerCache(cacheXmlConfigBean);
                }
            }
            //初始化crawlerSession
            List<Element> crawlerElementList = root.elements("crawler");
            Iterator<Element> ite = crawlerElementList.iterator();
            while (ite.hasNext()){
                CrawlerXmlConfigBean crawlerXmlConfigBean = new CrawlerXmlConfigBean();
                Element crawlerElement = ite.next();
                Element nameElement = crawlerElement.element("name");
                String name = nameElement.getTextTrim();
                crawlerXmlConfigBean.setName(name);
                Element startUrlElement = crawlerElement.element("starturl");
                String startUrl = startUrlElement.getTextTrim();
                crawlerXmlConfigBean.setStartUrl(startUrl);
                Element runModeElement = crawlerElement.element("runmode");
                int runmode = Integer.parseInt(runModeElement.getTextTrim());
                crawlerXmlConfigBean.setRunmode(runmode);
                Element fetchTimeoutElement = crawlerElement.element("fetchtimeout");
                int fetchTimeout = Integer.parseInt(fetchTimeoutElement.getTextTrim());
                crawlerXmlConfigBean.setFetchtimeout(fetchTimeout);
                Element urlStoreModeElement = crawlerElement.element("urlstoremode");
                String urlStoreMode = urlStoreModeElement.getTextTrim();
                crawlerXmlConfigBean.setUrlstoremode(urlStoreMode);
                //获取第一个解析器
                Element firstparserElement = crawlerElement.element("firstparser");
                String firstparser = firstparserElement.attribute("class").getValue();
                Class firstparserCls = null;
                try {
                    firstparserCls = Class.forName(firstparser);
                }catch (ClassNotFoundException e){

                }
                if(firstparserCls != null) {
                    try {
                        crawlerXmlConfigBean.setFirstparser((HtmlParser)firstparserCls.newInstance());
                    } catch (InstantiationException e) {

                    } catch (IllegalAccessException e) {

                    }
                }
                //获取存储器
                Element pipelineElement = crawlerElement.element("pipeline");
                String pipeline = pipelineElement.attribute("class").getValue();
                Class pipelineCls = null;
                try {
                    pipelineCls = Class.forName(pipeline);
                }catch (ClassNotFoundException e){

                }
                if(pipelineCls != null) {
                    try {
                        crawlerXmlConfigBean.setPipeline(new PipelineProxy((AbstractPipeline) pipelineCls.newInstance()));
                    } catch (InstantiationException e) {

                    } catch (IllegalAccessException e) {

                    }
                }
                xmlConfigBean.registerCrawler(name,crawlerXmlConfigBean);
            }
            //获取监控配置
            Element monitorEle = root.element("monitor");
            if (monitorEle != null) {
                MonitorConfigBean monitorConfigBean = new MonitorConfigBean();
                Element portEle = monitorEle.element("port");
                int port = Integer.parseInt(portEle.attribute("value").getValue());
                monitorConfigBean.setPort(port);
                xmlConfigBean.setMonitorConfigBean(monitorConfigBean);
            }
            return xmlConfigBean;
        }catch (Exception e){
            throw new DataSourceInitException();
        }
    }
}
